package com.ndood.spider.sample.processor;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.pipeline.ConsolePipeline;
import us.codecraft.webmagic.processor.PageProcessor;

/**
 * 默认页面处理器
 */
public class SamplePageProcessor implements PageProcessor {

	@Override
	public void process(Page page) {
		page.putField("test", page.getJson());
	}

	@Override
	public Site getSite() {
		Site site = Site.me().setCharset("UTF-8").setCycleRetryTimes(3).setSleepTime(3 * 1000)
				.addHeader("Connection", "keep-alive").addHeader("Cache-Control", "max-age=0")
				.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0");
		return site;
	}

	public static void main(String[] args) {
		Spider.create(new SamplePageProcessor()).addUrl("http://my.oschina.net/flashsword/blog")
				.addPipeline(new ConsolePipeline()).run();
	}

}
